import os
import pandas as pd

# 添加 测试数据
os.makedirs(os.path.join('.', 'data'), exist_ok=True)
data_file = os.path.join('.', 'data', 'house_tiny.csv')
# with open(data_file, 'w') as f:
#     f.write('NumRooms,Alley,Test,Price\n')
#     f.write('NA,Pave,NA,127500\n')
#     f.write('2,NA,A,106000\n')
#     f.write('4,NA,NA,178100\n')
#     f.write('NA,NA,B,14000\n')

# 读取 csv 数据
data = pd.read_csv(data_file)
print("\nCSV => \n", data)

# 检测缺失值
res_null = pd.isnull(data).sum()
print("\nres_null => \n", res_null)

#
# # 通过位置索引iloc，将 data 分成 inputs、 outputs
# inputs, outputs = data.iloc[:, 0:3], data.iloc[:, 3]
# # 处理缺失值，替换法 - 用当前列的平均值，填充 NaN
# inputs = inputs.fillna(inputs.mean())
# print("\ninputs.fillna => \n", inputs)
#
# print(inputs.dropna())
#
# # 把离散的类别信息转化为 one-hot 编码形式
# inputs = pd.get_dummies(inputs, dummy_na=True)
# print("\none-hot => \n", inputs)
#
# import paddle
#
# # 转换为张量格式
# x, y = paddle.to_tensor(inputs.values), paddle.to_tensor(outputs.values)
# print("\n to_tensor => \n", x, y)
